/*
Input: $tmp/wl_sb_assignments_raw [Polling location - precinct assignments across all elections]

Output: $tmp/wl_sb_assignments_clean.dta

Tasks:
	Merge geocodes
	Harmonize polling place (PP) identifier, e.g. when same type (e.g., "Grundschule") but slightly different address
		> also adjust geocodes accordingly (take always the geocode from the first occurance)
			wahllokal_id := Raw-identifier of PP based on reported address
	GEN:	wl_id := harmonized PP identifier used in final dataset
	GEN: 	wl_typ:= harmonized PP description used in final dataset

*/

*** 1) Harmonize Polling place IDs and description across different elections
*	This create as list of unique PP (wahllokal_id) and their harmonized ID (wl_id)
// E.g., "Alten- und Servicezentrum Haidhausen" has address Wolfgangstr. 15 in SE-18 and Wolfgangstr. 18 in FE-17 => but same building

** PULL: Polling location x precinct x election  
use "$tmp/wl_sb_assignments_raw", clear
	isid wahl_id sb

 * MERGE PP geocodes 
	merge m:1 wahllokal_id using "$tmp/wl_unique_geocoded.dta", assert(3) nogen
	isid wahl_id sb
	// rename geocodes and drop redundant variables from geocoding
	ren 		g_lat 	lat_wl
	lab var 	lat_wl 	"latitude of polling location"
	ren 		g_lon 	lon_wl
	lab var 	lon_wl 	"longitude of polling location"
	destring 	lat_wl 	lon_wl, replace
	drop g_*

 * HARMONIZE wahllokal_type across elections; gen wl_type
	gen 	tmp_len = strlen(wahllokal_type) // use most detailed description
	bys 	wahllokal_id (tmp_len wahllokal_type): gen wl_typ = wahllokal_type[_N]
	lab var wl_typ "Description of PP (harmonized over different elections)"
	drop tmp*
	
 * KEEP unique polling place IN EACH ELECTION
	duplicates drop wahl_id wahllokal_id, force	
	drop sb wahllokal_type

 *** HARMONIZE polling place ID across elections: gen wl_id
	gen 	wl_id = wahllokal_id
	lab var wl_id 		 "Harmonized PP ID"
	lab var wahllokal_id "Raw ID of PP based on reported address"

	// keep UNIQUE polling places (from official data)
	isid wahllokal_id wahl_id
	bys wahllokal_id (wahl_id): keep if _n==1 // keep earliest election

	// Manual adjustments in cases where PP has same description but slightly different address
	// E.g., Alten- und Servicezentrum Haidhausen has address Wolfgangstr. 15 in LTW18 and Wolfgangstr. 18 in BTW17 => but same building

	replace wl_id="wolfgangstrasse15" 		if wl_id=="wolfgangstrasse18"
	replace wl_id="freseniusstrasse45" 		if wl_id=="freseniusstrasse47"
	replace wl_id="grandlstrasse5" 			if wl_id=="grandlstrasse12"
	replace wl_id="barlachstrasse26" 		if wl_id=="barlachstrasse28"

	replace wl_id="bergamlaimstrasse140" 	if wl_id=="bergamlaimstrasse142"
	replace wl_id="dieselstrasse12" 		if wl_id=="dieselstrasse14"
	replace wl_id="dompedroplatz2" 			if wl_id=="dompedroplatz5"
	replace wl_id="eduardsprangerstrasse15" if wl_id=="eduardsprangerstrasse17"
	replace wl_id="forellenstrasse1" 		if wl_id=="forellenstrasse5"
	replace wl_id="musenbergstrasse30" 		if wl_id=="musenbergstrasse32"
	replace wl_id="pfanzeltplatz5" 			if wl_id=="pfanzeltplatz10"
	replace wl_id="sanktannastrasse20" 		if wl_id=="sanktannastrasse22"

	replace wl_id="markgrafenstrasse33" 	if wl_id=="markgrafenstrasse31"
	replace wl_id="engelhardstrasse26" 		if wl_id=="karwendelstrasse39"
		 
	* Harmonize geocoordinates (always take the one from the earliest election)
	bys wl_id (wahl_id wahllokal_id): replace wl_typ = wl_typ[1]
	bys wl_id (wahl_id wahllokal_id): replace lat_wl = lat_wl[1]
	bys wl_id (wahl_id wahllokal_id): replace lon_wl = lon_wl[1]
	
	drop wahl_id wahl 
	isid wahllokal_id

 ** MERGE back to full PP-precinct assignment 
	merge 1:m wahllokal_id using "$tmp/wl_sb_assignments_raw.dta", assert(3) nogen
	
	// DROP redundant PP ID Raw PP description (KEEP harmonized id: wl_id)	
	drop  wahllokal_id wahllokal_type
	
	// DROP other redundtant PP variables 
	drop strasse_wl nummer_wl
	
 * SAVE: Correspondence of (harmonized) PP ID to precinct across elections
	compress 
	order sb wl_id wahl_id wahl  
	isid sb wahl_id	
	save "$tmp/wl_sb_assignments_clean.dta", replace
